clear
clear matrix
set more off

* ------------------------------------------------------------------------------
* set paths
global root "[include path here]"
global input "$root/Input"
global output "$root/Output"
global intermediate "$root/Intermediate"
global figures "$root/Figures"

* ______________________________________________________________________________
* REGRESSIONS

use "$input/eventStudyPanel_matched.dta", clear

* define local fixed effect specification variables
local fe_pmFEymFE   "i.pid#i.month i.year#i.month"
local fe "pmFEymFE"

foreach year of numlist 1998/2014 {
display " - - - - - - - - - matchid_`year' - - - - - - - - - "
use "$intermediate/matchedsample_parcels_`year'.dta", clear
	// these files are created by creating the matched panel
duplicates tag pid, gen(parcelfreq_year)
replace parcelfreq_year = parcelfreq_year + 1
duplicates drop pid, force
sort pid
save "$intermediate/temp_fparcel.dta", replace

use "$input/eventStudyPanel_matched.dta", clear
drop _merge
merge m:1 pid using "$intermediate/temp_fparcel.dta"
keep if _merge==3 & matchid_`year' == 1
sort pid year month

reghdfe wuse chi pre [fweight=parcelfreq_year], a(`fe_`fe'') pool(1) vce(cl pid)
	display "command line: `e(cmdline)'"
	local regid "`fe'`year'"
	estimates save "$output/didmatch_`regid'.ster", replace
	
	preserve // PARTICIPATION PLOTS
	keep if e(sample)
	duplicates drop pid, force
		
	tabstat conv_sqft, stat(n mean sd min max) by(event_month) columns(statistics) f(%9.0gc)
	collapse (count) nconv = conv_sqft nrebate = tot_rebated (mean) meanconv = conv_sqft  ///
	(sd) sdconv = conv_sqft (semean) seconv = conv_sqft (min) minconv=conv_sqft (max) maxconv = conv_sqft, by(event_year)
	sort event_year
	save "$output/didmatch_`regid'_conversions.dta", replace
	restore

}
rm "$intermediate/temp_fparcel.dta"


****** interaction: year parcel-month (chi-by-preTreatment consumption [ptc])
** pre-treatment (i.e. baseline) consumption decile analysis

* define annual pre-treatment consumption using 12 x n monthly average (starting 1 year out)
use "$input/eventStudyPanel_matched.dta", clear
foreach n of numlist 2/5 {
	local mintau = -12*`n'
	by pid: egen ptc`n'_nrm = mean(wuse_norm) if inrange(tau,`mintau',-13)
	}
keep if conversions == 1
keep if tau == -13
duplicates report pid

* define deciles of pre-treatment consumption decile (ptcd) for each 'n' (2 though 5)
foreach n of numlist 2/5 {
	xtile ptcd`n'_nrm = ptc`n'_nrm, nquantiles(10)
	la var ptcd`n'_nrm "pre-treatment (t-`n' to t-1 yr) average normalized consumption decile"
	_pctile ptc`n'_nrm, percentiles(10(10)90)
	display "return normalized _pctile list: `n'"
	return list // lists maximum of the nth decile: r(r1) = x means the top of the 0-10th decile
	}
keep pid ptcd* 
sort pid 
la data "temporary file: pre-converting water use decile category"
save "$intermediate/tempmatch_ptcd.dta", replace

* include pre-convert consumption decile definitions back into matched sample
use "$input/eventStudyPanel_matched.dta", clear
tab conversions, missing
sort pid year month
merge m:1 pid using "$intermediate/tempmatch_ptcd.dta" 
sort pid year month
tab conversions _merge // sanity check
drop _merge

rm "$intermediate/tempmatch_ptcd.dta"
sort pid year month
save "$intermediate/temp_matchedMOD.dta", replace // matched sample with pre-convert consumption decile definitions


* define conversion pre-conversion decile indicators and create chi x indicator interactions
foreach n of numlist 2/5 {
	foreach d of numlist 1/10 {
		gen ptcd`n'dec`d'_nrm = 0
		replace ptcd`n'dec`d'_nrm = 1 if ptcd`n'_nrm == `d'
		gen chiBYptcd`n'd`d'_nrm = chi * ptcd`n'dec`d'_nrm
		drop ptcd`n'dec`d'_nrm
		}
	}

** alternative regression
foreach n of numlist 2/5 {
foreach d of numlist 1/10 {
	// normalized regression: prep
	display "ptcd`n'_nrm and decile `d': pre-regression stuff ----------------------"
	use "$intermediate/temp_matchedMOD.dta", clear // pull non-participant matched parcels for each decile
	keep if ptcd`n'_nrm == `d'
	duplicates drop pid, force
	sort pid
	merge 1:1 pid using "$intermediate/matchedparcels.dta"
	tab ptcd`n'_nrm _merge, m
	keep if _merge==3

	preserve // keep only list of enrolling parcels
	  keep pid 
	  gen enroll = 1
	  save "$intermediate/temp_enrolled.dta", replace
	restore

	keep matchparcel // keep only list of matched parcels and append into enrolled parcel list
	duplicates report matchparcel
	rename matchparcel pid
	append using "$intermediate/temp_enrolled.dta"
	
	duplicates report pid // generate matched parcel frequency use for fweights in regression
	duplicates tag pid, gen(parcelfreq_dec)
	replace parcelfreq_dec = parcelfreq_dec + 1
	duplicates drop pid, force
	sort pid
	save "$intermediate/temp_parcels_dec.dta", replace

	use "$input/eventStudyPanel_matched.dta", clear 
	// merge decile list with sample, and keep obs that match with decile list
	merge m:1 pid using "$intermediate/temp_parcels_dec.dta"
	keep if _merge == 3
	drop _merge

	// ** normalized regressions ** 
	display "ptcd`n'_nrm and decile `d': normalized regression ----------------------"
	reghdfe wuse_norm chi pre [fweight=parcelfreq_dec], a(`fe_`fe'') pool(1) vce(cl pid)
	local regid "normQ`fe'ptcd`n'_d`d'"
	estimates save "$output/didmatch_`regid'.ster", replace
	
	preserve
	  keep if e(sample)
	  duplicates drop pid, force
	  collapse (count) nconv = conv_sqft nrebate = tot_rebated (mean) meanlot = lotsize meanconv = conv_sqft  ///
	    (sd) sdconv = conv_sqft (semean) seconv = conv_sqft (min) minconv=conv_sqft (max) maxconv = conv_sqft
	  save "$output/didmatch_`regid'_conversions.dta", replace
	restore	
        }
}
rm "$intermediate/temp_enrolled.dta"
rm "$intermediate/temp_parcels_dec.dta"
rm "$intermediate/temp_matchedMOD.dta"

* ______________________________________________________________________________
* CREATE FIGURES
clear
set more off

* ==============================================================================
* figure 4b
* ==============================================================================
clear
foreach yr of numlist 1998/2014 {
	estimates use "$output/didmatch_pmFEymFE`yr'.ster" 
	mat result`yr' = e(b)', vecdiag(e(V))'
	svmat result`yr', names(matcol)
	keep if _n == 1
	}

expand 17
gen event_year = _n + 1997
gen beta = 0
gen r1 = 0
foreach yr of numlist 1998/2014 {
	replace beta = result`yr'y1 if event_year == `yr'
	replace r1 = result`yr'r1 if event_year == `yr'
	}
gen rse = sqrt(r1)
order event_year beta rse
drop r1 result*

foreach yr of numlist 1998/2014 {
merge 1:1 event_year using "$output/didmatch_pmFEymFE`yr'_conversions.dta", ///
 update keepusing(nconv nrebate meanconv minconv maxconv)
rename _merge _merge`yr'
} 
drop if event_year == .
drop _merge*

* generate plot variables
gen unitsave = beta*-1000*12/meanconv
gen unitSE = ((12*1000)/meanconv)*rse
gen unitsave_lb = unitsave - 1.96*unitSE
gen unitsave_ub = unitsave + 1.96*unitSE

gen snwa1 = 55.8
gen snwa2 = 54.7

* graph of savings and unit savings

gr tw sc unitsave unitsave_lb unitsave_ub snwa1 snwa2 event_year ///
if event_year>=2000, c(l l l l l) lpattern(solid dash dash shortdash shortdash) ///
lc(gs0 gs7 gs7 gs10 gs10) m(O oh oh i i) mc(gs0 gs7 gs7 gs10 gs10) ///
ytitle("water savings [gal/ft{sup:2}/year]") xtitle("") xlabel(2000(1)2014) /// 
ylabel(, nogrid) graphregion(color(white)) xsize(8) ysize(5) legend(off)
graph export "$figures/figure_4b.pdf", replace


* ==============================================================================
* figure 5b (the paper used the ptcd2 version)
* ==============================================================================
local xtiptcd "pre-enrollment consumption decile"
set more off
foreach n of numlist 2/5 {
  local mintau = -12*`n'
  foreach d of numlist 1/10 {
    clear
    * place coefficient estimates in stata data file
	estimates use "$output/didmatch_normQpmFEymFEptcd`n'_d`d'.ster"
	mat result = e(b)', vecdiag(e(V))'

	svmat result, names(matcol)
	keep if _n == 1
	gen rse = sqrt(resultr1)
	gen ci_lb = resulty1 - 1.96*rse
	gen ci_ub = resulty1 + 1.96*rse

	gen decile = `d'
	rename resulty1 beta
	rename resultr1 diag_vcm

	* merge coefficient estimates with conversion size summary data file
	preserve
	  use "$output/didmatch_normQpmFEymFEptcd`n'_d`d'_conversions.dta", clear
	  gen decile = `d'
	  save "$intermediate/temp.dta", replace
	restore
  
	merge 1:1 decile using "$intermediate/temp.dta"
	keep if _merge==3
	drop _merge
	sort decile

	save "$intermediate/temp_d`d'.dta", replace
	}
	
  * append decile files
  use "$intermediate/temp_d1.dta", clear
  foreach d of numlist 2/10 {
	append using "$intermediate/temp_d`d'.dta"
	rm "$intermediate/temp_d`d'.dta"
	}
	rm "$intermediate/temp.dta"
  sort decile
  order decile beta ci_lb ci_ub meanconv

  * generate plot variables
  gen savings = -1*beta
  gen savings_rse = rse
  gen savelb = savings - 1.96 * savings_rse
  gen saveub = savings + 1.96 * savings_rse

* graph of savings
gr tw sc savings savelb saveub decile, ///
c(l l l) lpattern(solid dash dash) lc(gs0 gs7 gs7) ///
m(O oh oh) mc(gs0 gs7 gs7) xtitle("`xtiptcd'") xlabel(1(1)10) ///
ytitle("normalized average water savings [gal/ft{sup:2}-month]") /// 
ylabel(0(.2)1, nogrid) graphregion(color(white)) xsize(8) ysize(5) legend(off)
graph export "$figures/figure_5b_ptcd`n'.pdf", replace
}
rm "$dtaFiles/temp_d1.dta" 